# Loading necessary library files
library(shiny)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(lubridate)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(leaflet)
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 4.2.3
data <- read.csv("DataCoSupplyChainDataset.csv")
data <- subset(data, select = -c(Customer.Email, Customer.Password, Order.Item.Cardprod.Id, Order.Zipcode, Product.Card.Id, Product.Description, Product.Image))

data <- na.omit(data)
# Display summary statistics for numerical variables
summary(data[, c("Days.for.shipping..real.","Days.for.shipment..scheduled.","Benefit.per.order","Sales.per.customer")])
##  Days.for.shipping..real. Days.for.shipment..scheduled. Benefit.per.order 
##  Min.   :0.000            Min.   :0.000                 Min.   :-4274.98  
##  1st Qu.:2.000            1st Qu.:2.000                 1st Qu.:    7.00  
##  Median :3.000            Median :4.000                 Median :   31.52  
##  Mean   :3.498            Mean   :2.932                 Mean   :   21.98  
##  3rd Qu.:5.000            3rd Qu.:4.000                 3rd Qu.:   64.80  
##  Max.   :6.000            Max.   :4.000                 Max.   :  911.80  
##  Sales.per.customer
##  Min.   :   7.49   
##  1st Qu.: 104.38   
##  Median : 163.99   
##  Mean   : 183.11   
##  3rd Qu.: 247.40   
##  Max.   :1939.99
sales_histogram <- function(data) {
  ggplot(data, aes(x = Customer.Id, fill = after_stat(x))) +
    geom_histogram() +
    scale_fill_gradient(low = "blue", high = "red") +
    labs(x = "Sales per customer", y = "Count", title = "Distribution of Sales per Customer")
}

sales_histogram(data)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Function to create histogram of delivery status distribution
create_delivery_status_histogram <- function(data) {
  ggplot(data, aes(x = Delivery.Status)) +
    geom_bar() +
    labs(x = "Delivery Status", y = "Count", title = "Delivery Status Distribution")
}
create_delivery_status_histogram(data)

delivery_status_plot <- function(data) {
  ggplot(data, aes(x = Delivery.Status)) +
    geom_bar(fill = "#69b3a2") +
    scale_color_manual(values = c("#E69F00", "#56B4E9", "#009E73")) +
    labs(x = "Delivery Status", y = "Count", title = "Delivery Status Distribution")
}
delivery_status_plot(data)

late_delivery_category_plot <- function(data) {
  # Filter data based on "Late delivery" status
  late_delivery <- data %>%
    filter(Delivery.Status == "Late delivery")
  
  # Count the number of late deliveries by Category Name
  category_counts <- late_delivery %>%
    group_by(Category.Name) %>%
    summarise(count = n()) %>%
    arrange(desc(count)) %>%
    top_n(10)
  
  # Plot the top 10 categories by count of late deliveries
  ggplot(category_counts, aes(x = Category.Name, y = count, fill = Category.Name)) +
    geom_col() +
    labs(x = "Category Name", y = "Count of Late Deliveries", title = "Top 10 Categories by Late Delivery Count") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
late_delivery_category_plot(data)
## Selecting by count

plot_late_delivery_by_country <- function(data) {
  # Filter data based on "Late delivery" status
  late_delivery <- data %>%
    filter(Delivery.Status == "Late delivery")

  # Count the number of late deliveries by Order.Country
  country_counts <- late_delivery %>%
    group_by(Order.Country) %>%
    summarise(count = n()) %>%
    arrange(desc(count)) %>%
    top_n(10)

  # Plot the top 10 countries by count of late deliveries
  ggplot(country_counts, aes(x = Order.Country, y = count, fill = Order.Country)) +
    geom_col() +
    labs(x = "Country", y = "Count of Late Deliveries", title = "Top 10 Countries by Late Delivery Count") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
plot_late_delivery_by_country(data)
## Selecting by count

# Function to create boxplot of sales by category name
create_sales_boxplot <- function(data) {
  ggplot(data, aes(x = Category.Name, y = Sales, fill = Category.Name)) +
    geom_boxplot() +
    labs(title = "Category Name vs. Sales")
}
create_sales_boxplot(data)

# Function to create boxplot of late delivery risk by shipping mode
create_delivery_boxplot <- function(data) {
  ggplot(data, aes(x = Shipping.Mode, y = Late_delivery_risk, fill = Shipping.Mode)) +
    geom_boxplot() +
    labs(title = "Shipping Mode vs. Late Delivery Risk")
}
create_delivery_boxplot(data)

# Highest revenue based on Department.

plot_sales_by_department <- function(data) {
  # Aggregate sales by department
  sales_by_department <- data %>%
    group_by(Department.Name) %>%
    summarize(total_sales = sum(Sales))
  
  # Create the plot
  p <- ggplot(sales_by_department, aes(x = Department.Name, y = total_sales, fill = Department.Name, 
                                        text = paste("Department: ", Department.Name, "<br>", "Sales: $", 
                                                     scales::comma(total_sales)))) +
    geom_bar(stat = "identity") +
    labs(title = "Total Sales by Department", x = "Department", y = "Sales") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
  
  # Convert the plot to Plotly
  ggplotly(p)
}
plot_sales_by_department(data)
top_10_orders_by_country <- function(data) {
  # calculate the count of orders by Order.Country
  orders_by_country <- data %>%
    group_by(Order.Country) %>%
    summarize(count = n()) %>%
    arrange(desc(count)) %>%
    slice(1:10)

  # create the plot 
  p <- ggplot(orders_by_country, aes(x = Order.Country, y = count, fill = Order.Country, text = paste("Country: ", Order.Country, "<br>", "Orders: ", count))) +
    geom_bar(stat = "identity") +
    labs(title = "Top 10 Order Countries by Customer Orders") +
    xlab("Order Country") +
    ylab("Customer Orders") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

  # convert the plot to plotly
  ggplotly(p)
}
top_10_orders_by_country(data)
plot_order_region_count <- function(data) {
  
  # create a data frame with the count of orders by order region
  order_region_count <- data %>%
    group_by(Order.Region) %>%
    summarise(Count = n()) %>%
    arrange(desc(Count)) 
  
  # create the plot
  plot <- ggplot(order_region_count, aes(x = Order.Region, y = Count, fill = Order.Region, text = paste("Order Region: ", Order.Region, "<br>", "Order Count: ", Count))) +
    geom_bar(stat = "identity") +
    labs(title = "Order Regions by Count of Orders from Customers") +
    xlab("Order Region") +
    ylab("Order Count") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    scale_fill_viridis_d()
  
  # convert the plot to plotly
  ggplotly(plot)
}
plot_order_region_count(data)
# get the top 20 customers who did the highest sales.

top_customers_sales_plot <- function(data) {
  
  # create a data frame with sales per customer
  sales_per_customer <- data %>%
    group_by(Customer.Id) %>%
    summarise(total_sales = sum(Sales)) %>%
    arrange(desc(total_sales))
  
  # get the top 20 customers who did the highest sales.
  top_customers <- head(sales_per_customer, 20)
  
  # create a new column with the combined first and last name of each customer
  top_customers <- top_customers %>%
    left_join(data %>% select(Customer.Id, Customer.Fname, Customer.Lname), by = "Customer.Id") %>%
    mutate(CustomerName = paste(Customer.Fname, Customer.Lname, sep = " "))
  
  # create the plot
  plot <- ggplot(top_customers, aes(x = reorder(CustomerName, -total_sales), y = total_sales, fill = CustomerName, text = paste("Customer Name: ", CustomerName, "<br>", "Total Sales: $", total_sales))) +
    geom_bar(stat = "identity") +
    labs(title = "Top 20 Customers by Sales") +
    xlab("Customer Name") +
    ylab("Total Sales") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  
  # convert the plot to plotly
  ggplotly(plot)
}
top_customers_sales_plot(data)
generate_sales_profit_plot <- function(data) {
  
  # Convert order date to date format
  data$order.date..DateOrders. <- as.Date(data$order.date..DateOrders., format = "%m/%d/%Y %H:%M")
  
  # Create data for year dropdown menu
  years <- data %>%
    group_by(year = lubridate::year(order.date..DateOrders.)) %>%
    summarise(total_sales = sum(Sales), total_profit = sum(Order.Profit.Per.Order))
  
  # Create data for month dropdown menu
  months <- data %>%
    group_by(month = lubridate::month(order.date..DateOrders., label = TRUE),
             year = lubridate::year(order.date..DateOrders.)) %>%
    summarise(total_sales = sum(Sales), total_profit = sum(Order.Profit.Per.Order))
  
  # Create data for quarter dropdown menu
  quarters <- data %>%
    group_by(quarter = quarters(order.date..DateOrders.),
             year = lubridate::year(order.date..DateOrders.)) %>%
    summarise(total_sales = sum(Sales), total_profit = sum(Order.Profit.Per.Order))
  
  # Create sales and profit plot
  generate_plot <- function(input_sales_profit, input_year, input_month_quarter) {
    filtered_data <- if (input_month_quarter == "month") {
      months %>%
        filter(year == input_year)
    } else {
      quarters %>%
        filter(year == input_year)
    }
    if (input_sales_profit == "Sales") {
      ggplot(filtered_data, aes(x = get(input_month_quarter), y = total_sales)) +
        geom_bar(stat = "identity") +
        labs(title = paste("Total Sales by", input_month_quarter, "in", input_year), 
             x = input_month_quarter, y = "Sales")
    } else {
      ggplot(filtered_data, aes(x = get(input_month_quarter), y = total_profit)) +
        geom_bar(stat = "identity") +
        labs(title = paste("Total Profit by", input_month_quarter, "in", input_year), 
             x = input_month_quarter, y = "Profit")
    }
  }
  
  return(generate_plot)
}
sales_profit_plot <- generate_sales_profit_plot(data)
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'quarter'. You can override using the
## `.groups` argument.
# Generate sales plot for year 2016  monthly and quarterly 
sales_profit_plot("Sales", 2015, "month")

sales_profit_plot("Sales", 2015, "quarter")

# Generate sales plot for year 2016  monthly and quarterly 
sales_profit_plot("Sales", 2016, "month")

sales_profit_plot("Sales", 2016, "quarter")

# Generate sales plot for year 2017  monthly and quarterly 
sales_profit_plot("Sales", 2017, "month")

sales_profit_plot("Sales", 2017, "quarter")

# Generate sales plot for year 2016  monthly and quarterly 
sales_profit_plot("Sales", 2018, "month")

sales_profit_plot("Sales", 2018, "quarter")